---
title: "Socio-Demographic Analysis"
author: "melissa_barales"
output: pdf_document
---
```{r}
setwd("~/Desktop/Senior Thesis/Census")
```

```{r}
#loading in dataset containing both ACS files 
dat <- readRDS("Merged_ACS.rds")
```

```{r}
#Calculating proporiton of ethnicities by puma_2
sums <- dat %>% 
  group_by(puma_2, ethnicity) %>% 
  dplyr::summarize(persons=sum(perwt.x, na.rm = TRUE)) %>% 
  dplyr::mutate(prop=persons/sum(persons, na.rm = TRUE)) %>%
  dplyr::mutate(rank=min_rank(x=-prop)) %>%
  dplyr::arrange(puma_2, rank)
```
```{r}
#Subsetting pumas to those that have 30% of a given ethnicity or more
prop_over30 <- subset(sums, sums$prop>=0.30)  
```

```{r}
table(prop_over30$ethnicity,useNA = "always")
``` 

```{r}
#narrowing down to Hispanic enclaves that rank 1 & 2 and make up more 
#than 30% of the PUMA's popualtion 
Hisp_enclaves <- sums %>% filter(ethnicity=="Hispanic" & prop>= .30)
```

```{r}
write.csv(Hisp_enclaves, "~/Desktop/Senior Thesis/Census/Hisp_enclaves.csv")
```

```{r}
table(Hisp_enclaves$rank)
```

**ENCLAVE RESIDENCY VARIABLE**

```{r}
#variable is made up of those who live in a Hispanic enclave over 30% 
dat <- dat %>% mutate(enclave_res = (ethnicity=="Hispanic" & puma_2 %in% Hisp_enclaves$puma_2))
```

```{r}
sum(Hisp_enclaves$persons)
```

```{r}
table(dat$enclave_res, useNA = "always")
```
**Regression of Enclave Effect on Socio-Demographic Variables**

```{r}
#Recoding NAs of dependent variables 
#Income of Wages
dat$incwage[dat$incwage==999999 | 
                        dat$incwage==999998] <- NA
```

```{r}
summary(dat$incwage, na.rm = TRUE)
#note that the median is lower than the mean 
#it skews right
```
```{r}
#Use of Food Stamps
dat$foodstmp[dat$foodstmp==0] <- NA
```

```{r}
#Household Income 
dat$hhincome[dat$hhincome== 9999999] <- NA
#consider using family income instead
```

```{r}
summary(dat$hhincome, na.rm = TRUE)
```

```{r}
#Recoding educaiton variable to categorical variable
dat$education <- NA
```

```{r}
#HIGHEST LEVEL OF EDUCATION RECEIVED
dat$education[dat$educ==0] <- NA

dat$education[dat$educ==1 | dat$educ==2 | dat$educ==3 |  
            dat$educ==4 | dat$educ==5] <- "a - no high school degree"

dat$education[ dat$educ==6] <- "b - High school degree"

dat$education[dat$educ==7 |   dat$educ==8 |  dat$educ==9] <- "c - Some college"

dat$education[dat$educ== 10 ] <- "d - College degree"

dat$education[dat$educ== 11 ] <- "e - Postgraduate"
#number of people selecting high school and below seems low 
```

```{r}
prop.table(table(dat$education))
```

```{r}
#Converting education to factor variable 
dat$education <- as.factor(dat$education)
```

```{r}
#Creating a new employment variable 
```

```{r}
dat$emp <- NA 
```

```{r}
dat$emp[dat$empstat==0] <- NA
```

```{r}
dat$emp[dat$empstat==1] <- 1
```

```{r}
dat$emp[dat$empstat==2 | dat$empstat ==3] <- 0
```

```{r}
table(dat$emp)
```

```{r}
#Age squared
dat$age_squared <- dat$age^2
```

```{r}
#Recoding birth place
dat$bpl <- as.numeric(dat$bpl)
```

```{r}
#Foreign Born
dat$foreign <- NA
dat$foreign[dat$bpl <= 099] <- 0
dat$foreign[dat$bpl > 099] <- 1
```

```{r}
table(dat$foreign)
```

```{r}
prop.table(table(dat$hcovany))
```

```{r}
#Family Income
dat$ftotinc[dat$ftotinc==9999999] <- NA
```

```{r}
summary(dat$ftotinc)
```

```{r}
#Wellfare Income
dat$incwelfr[dat$incwelfr==99999] <- 0
```

```{r}
dat$incwelfr[dat$incwelfr!=0] <- 1
```

```{r}
table(dat$incwelfr)
```

**Wage and Salary Income**
```{r}
incwage <- lm(data = dat, incwage ~  enclave_res + age + age_squared +
                sex + density + education + foreign)
```

```{r}
summary(incwage)
```

```{r echo=FALSE, results='asis'}
stargazer(incwage, type = "text")
```

**Total Household Income**
```{r}
hincome <- lm(data = dat, hhincome ~  enclave_res + age + age_squared +  
               sex + density + education + foreign)
```

```{r}
summary(hincome)
``` 

```{r echo=FALSE}
stargazer(hincome, type = "text")
``` 

**Food Stamp Use**
```{r}
foodstmp <- lm(data = dat, foodstmp ~ enclave_res + age + age_squared +
                 sex + density + education + foreign) 
```

```{r}
summary(foodstmp)
```

```{r echo=FALSE}
stargazer(foodstmp, type="text")
```


**Employment**
```{r}
employ <- lm(data = dat, emp ~  enclave_res + age + age_squared + sex + density + 
               education + foreign)
```

```{r}
summary(employ)
```

```{r echo=FALSE, results='asis'}
stargazer(employ, type = "text")
```
```{r}
stargazer(incwage, hincome, foodstmp, employ, 
          dep.var.labels = c("Salary Income", "Household Income", 
                             "Food Stamps Use", "Employment"),
          covariate.labels = c("Enclave Residency", "Age", "Age Squared", 
                               "Sex", "PUMA Density", 
                               "High School Degree", "Some College", 
                               "College Degree", 
                               "Postgraduate", "Foreign-born"), align=TRUE,
          title="Effect of Enclave Residency on Socio-demographic Outcomes", 
          type = "text",
          out = "combined_model.htm")
```

**Predicting Demographic Variables on Enclave Residency**

```{r}
diff <- lm(data = dat, enclave_res ~ age + 
                sex + education) 
```

```{r echo=FALSE, results='asis'}
stargazer(diff, type = "text", out = "new_model.htm")
```



